In [1]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
import matplotlib
import seaborn as sns
import scipy
from IPython.display import HTML
In [2]:
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')
Out[2]:
The simulation was performed on 10 different trees, used to generate alignments. For each tree, we gathered up to 5 calibration points (0, 1, 3, 5), and up to 50 constraints (0, 1, 3, 5, 20, 50). We ran the MCMC in each combination of conditions (=16 combinations), in 5 replicates, changing the nature of the calibrations and constraints. So in total we have $10~trees*24~conditions*5~replicates=1200~points$.
In [3]:
from ete3 import Tree, TreeStyle, NodeStyle
def readTreeFromFile(file):
try:
f=open(file, 'r')
except IOError:
print ("Unknown file: "+file)
sys.exit()
line = ""
for l in f:
line += l.strip()
f.close()
t = Tree( line )
return t
# chronograms:
files = ["AnalysisBEFORE06052019/FirstAttempt/extantTree_1_rescaled.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_2_rescaled.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_3_rescaled.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_4_rescaled.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_5_rescaled.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_6_rescaled.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_7_rescaled.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_8_rescaled.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_9_rescaled.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_10_rescaled.dnd"]
ts = TreeStyle()
ts.min_leaf_separation= 10
ts.scale = 2020
ts.show_leaf_name = False
nstyle = NodeStyle()
nstyle["size"] = 0
thickness = 1
vt_line_width = 2
hz_line_width = 2
ts.scale = thickness * 2400 # 120 pixels per branch length unit
#ts.min_leaf_separation= 10
ts.show_scale = False
trees = list()
i = 0
for f in files:
trees.append(readTreeFromFile(f))
for n in trees[i].traverse():
n.set_style(nstyle)
trees[i].render("extantTree"+str(i+1)+".png", tree_style=ts)
i = i +1
# rescaled trees:
files = ["AnalysisBEFORE06052019/FirstAttempt/extantTree_1_rescaled_altered.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_2_rescaled_altered.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_3_rescaled_altered.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_4_rescaled_altered.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_5_rescaled_altered.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_6_rescaled_altered.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_7_rescaled_altered.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_8_rescaled_altered.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_9_rescaled_altered.dnd","AnalysisBEFORE06052019/FirstAttempt/extantTree_10_rescaled_altered.dnd"]
ts = TreeStyle()
ts.min_leaf_separation= 10
ts.scale = 2020
ts.show_leaf_name = False
nstyle = NodeStyle()
nstyle["size"] = 0
thickness = 1
vt_line_width = 2
hz_line_width = 2
ts.scale = thickness * 2400 # 120 pixels per branch length unit
#ts.min_leaf_separation= 10
#ts.show_scale = False
trees = list()
i = 0
for f in files:
trees.append(readTreeFromFile(f))
for n in trees[i].traverse():
n.set_style(nstyle)
trees[i].render("extantTree"+str(i+1)+"_rescaled_altered.png", tree_style=ts)
i = i +1
In [4]:
colNames = ["treeId","numCalib","numCons","numReplicate","correlation", "rmsd", "correlation_bl", "rmsd_bl", "numNodes","numInHPD","fracInHPD","percent0","percent25","percent50","percent75","percent100"]
d = pd.read_csv ("resultAllTrees.txt", sep="\t", header=None, names=colNames)
In [5]:
d.describe()
Out[5]:
In [6]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["correlation_bl"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["correlation_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='Correlation in branch lengths')
Out[6]:
In [7]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["rmsd_bl"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["rmsd_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='RMSD in branch lengths')
Out[7]:
In [8]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["rmsd"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["rmsd"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='RMSD in node ages')
Out[8]:
In [9]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["correlation"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["correlation"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='Correlation with true dates')
Out[9]:
In [10]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["fracInHPD"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["fracInHPD"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='Fraction of dates in 95% HPD')
Out[10]:
In [11]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["percent50"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["percent50"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='Median size of the 95% HPD')
Out[11]:
In [12]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["rmsd_bl"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["rmsd_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='RMSD in branch lengths')
Out[12]:
In [13]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["correlation_bl"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["correlation_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='Correlation in branch lengths')
Out[13]:
In [14]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["rmsd"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["rmsd"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='RMSD in node ages')
Out[14]:
In [15]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["correlation"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["correlation"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='Correlation with true dates')
Out[15]:
In [16]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["fracInHPD"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["fracInHPD"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='Fraction in 95% HPD')
Out[16]:
In [17]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["percent50"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["percent50"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='Median size of the 95% HPD')
Out[17]:
In [18]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="rmsd_bl", x="numCalib", data=d)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average rmsd in branch lengths')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints", "20 constraints","50 constraints"])
Out[18]:
In [19]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="correlation_bl", x="numCalib", data=d)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average correlation in branch lengths')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints", "20 constraints","50 constraints"])
Out[19]:
In [20]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="rmsd", x="numCalib", data=d)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average rmsd in node ages')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints", "20 constraints","50 constraints"])
Out[20]:
In [21]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="correlation", x="numCalib", data=d)
ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average correlation in node ages')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints", "20 constraints","50 constraints"])
Out[21]:
In [22]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="fracInHPD", x="numCalib", data=d)
ax.set_ylim(50, 90)
ax.set(xlabel='Number of calibrations', ylabel='Average fraction in 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints", "20 constraints","50 constraints"])
Out[22]:
In [23]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="percent0", x="numCalib", data=d)
ax.set(xlabel='Number of calibrations', ylabel='Average smallest 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints", "20 constraints","50 constraints"])
Out[23]:
In [24]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="percent25", x="numCalib", data=d)
ax.set(xlabel='Number of calibrations', ylabel='Average 1st quartile 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints", "20 constraints","50 constraints"])
Out[24]:
In [25]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="percent50", x="numCalib", data=d)
ax.set(xlabel='Number of calibrations', ylabel='Average median 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints", "20 constraints","50 constraints"])
Out[25]:
In [26]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="percent75", x="numCalib", data=d)
ax.set(xlabel='Number of calibrations', ylabel='Average 3rd quartile 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints", "20 constraints","50 constraints"])
Out[26]:
In [27]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="percent100", x="numCalib", data=d)
ax.set(xlabel='Number of calibrations', ylabel='Average maximum 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints", "20 constraints","50 constraints"])
Out[27]:
Constraints and calibrations both improve accuracy. The number of constraints does not seem to have much of an effect on the size of the 95% HPD, contrary to calibrations, unless you get to lots of constraints (20, 5à or 100). However, when using large numbers of constraints, results are much improved, although with a lot of variance if there aren't any calibration. It is possible that on some trees convergence was difficult in the fully constrained runs.
First let's order the trees from good to bad according to the correlation in node ages when all constraints are used:
In [28]:
d_sel = d.loc[d['numCons'] == 100]
print(d_sel.sort_values(['correlation'], ascending=False))
In [29]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["treeId"], y=d["rmsd"], inner=None)
ax = sns.swarmplot(d["treeId"], y=d["rmsd"], color="white", edgecolor="gray")
ax.set(xlabel='Tree ID', ylabel='Rmsd in node ages')
Out[29]:
In [30]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["treeId"], y=d["correlation"], inner=None)
ax = sns.swarmplot(d["treeId"], y=d["correlation"], color="white", edgecolor="gray")
ax.set(xlabel='Tree ID', ylabel='Correlation in node ages')
Out[30]:
In [31]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["treeId"], y=d["rmsd_bl"], inner=None)
ax = sns.swarmplot(d["treeId"], y=d["rmsd_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Tree ID', ylabel='Rmsd in branch lengths')
Out[31]:
In [32]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["treeId"], y=d["correlation_bl"], inner=None)
ax = sns.swarmplot(d["treeId"], y=d["correlation_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Tree ID', ylabel='Correlation in branch lengths')
Out[32]:
In [33]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["treeId"], y=d["fracInHPD"], inner=None)
ax = sns.swarmplot(d["treeId"], y=d["fracInHPD"], color="white", edgecolor="gray")
ax.set(xlabel='Tree ID', ylabel='Fraction in 95% HPD')
Out[33]:
In [34]:
%matplotlib inline
ax = sns.barplot(hue="numReplicate", y="fracInHPD", x="treeId", data=d)
ax.set(xlabel='Tree ID', ylabel='Average fraction in 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["Replicate 1", "Replicate 2", "Replicate 3", "Replicate 4", "Replicate 5"])
Out[34]:
In [35]:
d_easy = d.loc[d['treeId'].isin([3,8])]
d_easy.describe()
Out[35]:
In [36]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="rmsd_bl", x="numCalib", data=d_easy)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average rmsd in branch lengths, easy trees')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints","20 constraints","50 constraints" ])
Out[36]:
In [37]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="correlation_bl", x="numCalib", data=d_easy)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average correlation in branch lengths, easy trees')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints","20 constraints","50 constraints" ])
Out[37]:
In [38]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="rmsd", x="numCalib", data=d_easy)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average rmsd in node ages, easy trees')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints","20 constraints","50 constraints" ])
Out[38]:
In [39]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="correlation", x="numCalib", data=d_easy)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average correlation in node ages, easy trees')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints","20 constraints","50 constraints" ])
Out[39]:
In [40]:
d_hard = d.loc[d['treeId'].isin([2,10])]
d_hard.describe()
Out[40]:
In [41]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="rmsd_bl", x="numCalib", data=d_hard)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average rmsd in branch lengths, hard trees')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","10 constraints","20 constraints","50 constraints" ])
Out[41]:
In [42]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="correlation_bl", x="numCalib", data=d_hard)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average correlation in branch lengths, hard trees')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","20 constraints","50 constraints","100 constraints" ])
Out[42]:
In [43]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="rmsd", x="numCalib", data=d_hard)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average rmsd in node ages, hard trees')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","20 constraints","50 constraints","100 constraints" ])
Out[43]:
In [44]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="correlation", x="numCalib", data=d_hard)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average correlation in node ages, hard trees')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints","20 constraints","50 constraints","100 constraints" ])
Out[44]:
In [45]:
#groupedByReplicates = d.groupby(["treeId","numCalib","numCons"], group_keys=True)
#groupedByReplicates.describe()
#groupedByReplicates.columns = ["_".join(x) for x in groupedByReplicates.columns.ravel()]
#groupedByReplicates.describe()
#groupedByReplicates["correlation"]
cors=pd.DataFrame({'var' : d.groupby( ["treeId","numCalib","numCons"] )["correlation"].std()}).reset_index()
# Let's drop the case with 100 constraints
cors = cors[cors["numCons"] != 100]
cors = cors.reset_index(drop=True)
#cors.head(130)
#pd.boxplot_frame_groupby(groupedByReplicates)
#groupedByReplicates["correlation"].boxplot()
#print(d.groupby(["treeId","numCalib","numCons"])["correlation"].var().unstack())
#d.groupby(["treeId","numCalib","numCons"])["correlation"].var().unstack().boxplot()
In [46]:
%matplotlib inline
# Select the color map named rainbow
cmap = plt.cm.get_cmap(name='tab10')
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(240):
mark = ""
if i < 24:
mark="o"
elif i < 48:
mark="v"
elif i < 72:
mark="^"
elif i < 96:
mark="<"
elif i < 120:
mark=">"
elif i < 144:
mark="8"
elif i < 168:
mark="s"
elif i < 192:
mark="p"
elif i < 216:
mark="*"
else:
mark="h"
ax.plot(i, cors["var"][i], mark, color = cmap(i % 6))
patches=list()
patches.append(matplotlib.patches.Patch(color=cmap(0), label='0 constraint'))
patches.append(matplotlib.patches.Patch(color=cmap(1), label='1 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(2), label='3 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(3), label='5 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(3), label='20 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(3), label='50 constraints'))
ax.legend(handles=patches)
plt.xlabel("Condition", fontsize=15)
plt.ylabel("Standard deviation in correlation", fontsize=15)
Out[46]:
In [47]:
%matplotlib inline
rmsd=pd.DataFrame({'var' : d.groupby( ["treeId","numCalib","numCons"] )["rmsd"].std()}).reset_index()
# Let's drop the case with 100 constraints
rmsd = rmsd[rmsd["numCons"] != 100]
rmsd = rmsd.reset_index(drop=True)
rmsd.head(130)
# Select the color map
cmap = plt.cm.get_cmap(name='tab10')
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(240):
mark = ""
if i < 24:
mark="o"
elif i < 48:
mark="v"
elif i < 72:
mark="^"
elif i < 96:
mark="<"
elif i < 120:
mark=">"
elif i < 144:
mark="8"
elif i < 168:
mark="s"
elif i < 192:
mark="p"
elif i < 216:
mark="*"
else:
mark="h"
ax.plot(i, rmsd["var"][i], mark, color = cmap(i % 6))
patches=list()
patches.append(matplotlib.patches.Patch(color=cmap(0), label='0 constraint'))
patches.append(matplotlib.patches.Patch(color=cmap(1), label='1 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(2), label='3 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(3), label='5 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(4), label='20 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(5), label='50 constraints'))
ax.legend(handles=patches)
plt.xlabel("Condition", fontsize=15)
plt.ylabel("Standard deviation in node age rmsd", fontsize=15)
Out[47]:
In [48]:
%matplotlib inline
fracInHPD=pd.DataFrame({'var' : d.groupby( ["treeId","numCalib","numCons"] )["fracInHPD"].std()}).reset_index()
# Let's drop the case with 100 constraints
fracInHPD = fracInHPD[fracInHPD["numCons"] != 100]
fracInHPD = fracInHPD.reset_index(drop=True)
fracInHPD.head(130)
# Select the color map
cmap = plt.cm.get_cmap(name='tab10')
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(240):
mark = ""
if i < 24:
mark="o"
elif i < 48:
mark="v"
elif i < 72:
mark="^"
elif i < 96:
mark="<"
elif i < 120:
mark=">"
elif i < 144:
mark="8"
elif i < 168:
mark="s"
elif i < 192:
mark="p"
elif i < 216:
mark="*"
else:
mark="h"
ax.plot(i, fracInHPD["var"][i], mark, color = cmap(i % 6))
patches=list()
patches.append(matplotlib.patches.Patch(color=cmap(0), label='0 constraint'))
patches.append(matplotlib.patches.Patch(color=cmap(1), label='1 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(2), label='3 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(3), label='5 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(4), label='20 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(5), label='50 constraints'))
ax.legend(handles=patches)
plt.xlabel("Condition", fontsize=15)
plt.ylabel("Standard deviation in fraction of nodes in 95% HPD", fontsize=15)
Out[48]:
In [49]:
%matplotlib inline
percent50=pd.DataFrame({'var' : d.groupby( ["treeId","numCalib","numCons"] )["percent50"].std()}).reset_index()
# Select the color map
cmap = plt.cm.get_cmap(name='tab10')
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(240):
mark = ""
if i < 24:
mark="o"
elif i < 48:
mark="v"
elif i < 72:
mark="^"
elif i < 96:
mark="<"
elif i < 120:
mark=">"
elif i < 144:
mark="8"
elif i < 168:
mark="s"
elif i < 192:
mark="p"
elif i < 216:
mark="*"
else:
mark="h"
ax.plot(i, percent50["var"][i], mark, color = cmap(i % 4))
patches=list()
patches.append(matplotlib.patches.Patch(color=cmap(0), label='0 constraint'))
patches.append(matplotlib.patches.Patch(color=cmap(1), label='1 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(2), label='3 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(3), label='5 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(4), label='20 constraints'))
patches.append(matplotlib.patches.Patch(color=cmap(5), label='50 constraints'))
ax.legend(handles=patches)
plt.xlabel("Condition", fontsize=15)
plt.ylabel("Standard deviation in median 95% HPD size", fontsize=15)
Out[49]:
It's hard to come up with much to say about the above graphs, except that even controlling for a given number of constraints on a given tree, there can be variation in the accuracy or the 95% HPD. This is expected given that constraints could be positioned on different nodes of the tree.
In [50]:
# Group analyses by tree, and recover correlation only
cors=pd.DataFrame({'var' : d.groupby( ["treeId","numCons", "numCalib"] )["correlation"].std()}).reset_index()
# Let's drop the case with 100 constraints
cors = cors[cors["numCons"] != 100]
cors = cors.reset_index(drop=True)
cors.head(130)
Out[50]:
In [51]:
%matplotlib inline
# Select the color map named tab10
cmap = plt.cm.get_cmap(name='tab10')
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(240):
mark = ""
if i < 24:
mark="o"
elif i < 48:
mark="v"
elif i < 72:
mark="^"
elif i < 96:
mark="<"
elif i < 120:
mark=">"
elif i < 144:
mark="8"
elif i < 168:
mark="s"
elif i < 192:
mark="p"
elif i < 216:
mark="*"
else:
mark="h"
ax.plot(i, cors["var"][i], mark, color = cmap(i % 4))
patches=list()
patches.append(matplotlib.patches.Patch(color=cmap(0), label='0 calibration'))
patches.append(matplotlib.patches.Patch(color=cmap(1), label='1 calibrations'))
patches.append(matplotlib.patches.Patch(color=cmap(2), label='3 calibrations'))
patches.append(matplotlib.patches.Patch(color=cmap(3), label='5 calibrations'))
ax.legend(handles=patches)
plt.xlabel("Condition", fontsize=15)
plt.ylabel("Standard deviation in correlation", fontsize=15)
Out[51]:
In [52]:
%matplotlib inline
fracInHPD=pd.DataFrame({'var' : d.groupby( ["treeId","numCons", "numCalib"] )["rmsd"].std()}).reset_index()
# Let's drop the case with 100 constraints
rmsd = rmsd[rmsd["numCons"] != 100]
rmsd = rmsd.reset_index(drop=True)
rmsd.head(130)
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(240):
mark = ""
if i < 24:
mark="o"
elif i < 48:
mark="v"
elif i < 72:
mark="^"
elif i < 96:
mark="<"
elif i < 120:
mark=">"
elif i < 144:
mark="8"
elif i < 168:
mark="s"
elif i < 192:
mark="p"
elif i < 216:
mark="*"
else:
mark="h"
ax.plot(i, rmsd["var"][i], mark, color = cmap(i % 4))
patches=list()
patches.append(matplotlib.patches.Patch(color=cmap(0), label='0 calibration'))
patches.append(matplotlib.patches.Patch(color=cmap(1), label='1 calibrations'))
patches.append(matplotlib.patches.Patch(color=cmap(2), label='3 calibrations'))
patches.append(matplotlib.patches.Patch(color=cmap(3), label='5 calibrations'))
ax.legend(handles=patches)
plt.xlabel("Condition", fontsize=15)
plt.ylabel("Standard deviation in node age rmsd", fontsize=15)
Out[52]:
In [53]:
%matplotlib inline
fracInHPD=pd.DataFrame({'var' : d.groupby( ["treeId","numCons", "numCalib"] )["fracInHPD"].std()}).reset_index()
# Let's drop the case with 100 constraints
fracInHPD = fracInHPD[fracInHPD["numCons"] != 100]
fracInHPD = fracInHPD.reset_index(drop=True)
fracInHPD.head(130)
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(240):
mark = ""
if i < 24:
mark="o"
elif i < 48:
mark="v"
elif i < 72:
mark="^"
elif i < 96:
mark="<"
elif i < 120:
mark=">"
elif i < 144:
mark="8"
elif i < 168:
mark="s"
elif i < 192:
mark="p"
elif i < 216:
mark="*"
else:
mark="h"
ax.plot(i, fracInHPD["var"][i], mark, color = cmap(i % 4))
patches=list()
patches.append(matplotlib.patches.Patch(color=cmap(0), label='0 calibration'))
patches.append(matplotlib.patches.Patch(color=cmap(1), label='1 calibrations'))
patches.append(matplotlib.patches.Patch(color=cmap(2), label='3 calibrations'))
patches.append(matplotlib.patches.Patch(color=cmap(3), label='5 calibrations'))
ax.legend(handles=patches)
plt.xlabel("Condition", fontsize=15)
plt.ylabel("Standard deviation in fraction of nodes in 95% HPD", fontsize=15)
Out[53]:
In [54]:
%matplotlib inline
percent50=pd.DataFrame({'var' : d.groupby( ["treeId","numCons", "numCalib"] )["percent50"].std()}).reset_index()
# Let's drop the case with 100 constraints
percent50 = percent50[percent50["numCons"] != 100]
percent50 = percent50.reset_index(drop=True)
percent50.head(130)
fig, ax = plt.subplots(figsize=(20, 10))
for i in range(240):
mark = ""
if i < 24:
mark="o"
elif i < 48:
mark="v"
elif i < 72:
mark="^"
elif i < 96:
mark="<"
elif i < 120:
mark=">"
elif i < 144:
mark="8"
elif i < 168:
mark="s"
elif i < 192:
mark="p"
elif i < 216:
mark="*"
else:
mark="h"
ax.plot(i, percent50["var"][i], mark, color = cmap(i % 4))
patches=list()
patches.append(matplotlib.patches.Patch(color=cmap(0), label='0 calibration'))
patches.append(matplotlib.patches.Patch(color=cmap(1), label='1 calibrations'))
patches.append(matplotlib.patches.Patch(color=cmap(2), label='3 calibrations'))
patches.append(matplotlib.patches.Patch(color=cmap(3), label='5 calibrations'))
ax.legend(handles=patches)
plt.xlabel("Condition", fontsize=15)
plt.ylabel("Standard deviation in median 95% HPD size", fontsize=15)
Out[54]:
In [ ]:
In [55]:
d0=pd.read_csv ("output/9_0_0_1.log", sep="\t")
d1=pd.read_csv ("output/9_0_1_1.log", sep="\t")
d3=pd.read_csv ("output/9_0_3_1.log", sep="\t")
d5=pd.read_csv ("output/9_0_5_1.log", sep="\t")
d10=pd.read_csv ("output/9_0_10_1.log", sep="\t")
d20=pd.read_csv ("output/9_0_20_1.log", sep="\t")
d50=pd.read_csv ("output/9_0_50_1.log", sep="\t")
d1.describe()
Out[55]:
In [56]:
%matplotlib inline
fig, ax = plt.subplots(7, figsize=(20, 10))
p1, = ax[0].plot(d0['Iteration'], d0['Posterior'], 'b-')
p2, = ax[1].plot(d1['Iteration'], d1['Posterior'], 'b-')
p3, = ax[2].plot(d3['Iteration'], d3['Posterior'], 'b-')
p4, = ax[3].plot(d5['Iteration'], d5['Posterior'], 'b-')
p5, = ax[4].plot(d10['Iteration'], d10['Posterior'], 'b-')
p6, = ax[5].plot(d20['Iteration'], d20['Posterior'], 'b-')
p7, = ax[6].plot(d50['Iteration'], d50['Posterior'], 'b-')
#cal, = ax[1].plot(d2['Iteration'], d2['Posterior'], 'g-')
#con, = ax[2].plot(d3['Iteration'], d3['Posterior'], 'r-')
ax[0].legend([p1], ['0 constraint'], loc='lower right')
ax[1].legend([p2], ['1 constraint'], loc='lower right')
ax[2].legend([p3], ['3 constraints'], loc='lower right')
ax[3].legend([p4], ['5 constraints'], loc='lower right')
ax[4].legend([p5], ['10 constraints'], loc='lower right')
ax[5].legend([p6], ['20 constraints'], loc='lower right')
ax[6].legend([p7], ['50 constraints'], loc='lower right')
#ax[1].legend([cal], ['Run 2'], loc='lower right')
#ax[2].legend([con], ['Run 3'], loc='lower right')
plt.xlabel("Iteration", fontsize=15)
plt.ylabel("Posterior", fontsize=15);
From these analyses, I would say:
In [ ]: